This file provides code to analyse a subset of a random 1/3 of participants for each lab.

1 Load Data

library(psych) # for SPSS-style PCA
library(paran) # for parallel analyses
## Loading required package: MASS
library(GPArotation) # for robustness checks
library(kableExtra) # for nice tables
library(tidyverse) # for data cleaning
## ── Attaching packages ───────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1     ✔ purrr   0.3.2
## ✔ tibble  2.1.3     ✔ dplyr   0.8.3
## ✔ tidyr   1.0.0     ✔ stringr 1.4.0
## ✔ readr   1.3.1     ✔ forcats 0.4.0
## ── Conflicts ──────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%()      masks psych::%+%()
## ✖ ggplot2::alpha()    masks psych::alpha()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::group_rows() masks kableExtra::group_rows()
## ✖ dplyr::lag()        masks stats::lag()
## ✖ dplyr::select()     masks MASS::select()
options(knitr.kable.NA = '')
knitr::opts_chunk$set(echo = TRUE, 
                      warning = FALSE, 
                      message = FALSE)

R.version.string
## [1] "R version 3.6.0 (2019-04-26)"
set.seed(8675309)

1.1 Simulate Study Data (for Stage 1 RR)

See https://osf.io/87rbg/ for Stage 1 RR code. The code below is modified from the original to account for a different raw data structure and to add additional tables and graphs. All analysis code is identical.

1.2 Load Study Data (SUBSET)

Load study data and demographic questionnaires from the data folder.

session <- read_csv("data/session.csv")
dat_quest <- read_csv("data/quest_data.csv")
dat_exp <- read_csv("data/exp_data.csv")

# reshape questionnaire data to make wide
quest <- dat_quest %>%
  select(session_id, endtime, user_id, q_name, dv) %>%
  group_by(session_id, user_id, q_name) %>%
  arrange(endtime) %>%
  filter(row_number() == 1) %>%
  ungroup() %>%
  spread(q_name, dv, convert = TRUE)

Join experiment and questionnaire data

ratings_raw <- session %>%
  filter(user_status %in% c("guest", "registered")) %>%
  group_by(proj_name) %>%
  sample_frac(1/3) %>%
  ungroup() %>%
  left_join(dat_exp, by = c("user_id", "session_id")) %>%
  filter(user_status %in% c("guest", "registered")) %>%
  separate(exp_name, c("psa", "language", "trait", "block"), 
           sep = "_") %>%
  select(-psa) %>%
  separate(proj_name, c("psa", "lang", "lab1", "lab2"),
           sep = "_", fill = "right") %>%
  filter(lab1 != "test") %>%
  unite(lab_id, c("lab1", "lab2")) %>%
  select(-psa, lang) %>%
  left_join(quest, by = c("session_id", "user_id")) %>%
  select(language, user_id = session_id, trait, 
         stim_id = trial_name, 
         order, rt, rating = dv,
         country, sex, age, ethnicity, lab = lab_id, block) %>%
  mutate(trait = recode(trait,
                        "Res" = "responsible",
                        "Wei" = "weird",
                        "Old" = "old",
                        "Tru" = "trustworthy",
                        "Dom" = "dominant",
                        "Emo" = "emostable",
                        "Agg" = "aggressive",
                        "Car" = "caring",
                        "Int" = "intelligent",
                        "Unh" = "unhappy",
                        "Soc" = "sociable",
                        "Mea" = "mean",
                        "Con" = "confident",
                        "Att" = "attractive"
  )) 
  
write_csv(ratings_raw, "data/ratings_raw_subset.csv")

Load subset of 1/3 of data from each lab.

ratings_raw <- read_csv("data/ratings_raw_subset.csv")

1.3 Load Auxillary Data

Data on regions and stimuli.

1.3.1 Load Region Data

regions <- read_csv("data/regions.csv")

1.3.2 Load Stimulus Info

stim_info <- read_csv("data/psa_cfd_faces.csv") %>%
  mutate(ethnicity = recode(Race, "A" = "asian", "B" = "black", "L" = "latinx", "W" = "white"),
         gender = recode(Gender, "M" = "male", "F" = "female")
  )

stim_info %>%
  group_by(ethnicity, gender) %>%
  summarise(
    n = n(),
    mean_age = round(mean(Age), 2),
    sd_age = round(sd(Age), 2)
  ) %>%
  knitr::kable("html") %>%
  kable_styling("striped")
ethnicity gender n mean_age sd_age
asian female 15 26.15 3.33
asian male 15 26.40 3.21
black female 15 27.00 3.51
black male 15 28.07 4.27
latinx female 15 25.27 2.42
latinx male 15 26.31 4.00
white female 15 25.77 3.03
white male 15 26.06 4.46
stim_n_male <- sum(stim_info$gender == "male")
stim_n_female <- sum(stim_info$gender == "female")
mean_age <- mean(stim_info$Age) %>% round(2)
sd_age <- sd(stim_info$Age) %>% round(2)
min_age <- min(stim_info$Age)
max_age <- max(stim_info$Age)

Stimuli in our study will be an open-access, full-color, face image set consisting of 60 men and 60 women (mean age=26.38 years, SD=3.57 years, range=18.7307692 to 34.9310345 years), taken under standardized photographic conditions (Ma et al., 2015).

1.3.3 Load O&T 2008 Data

# read original OT data and get into same format as data_agg will be

traits <- ratings_raw %>%
  filter(trait != "old", !is.na(trait)) %>%
  arrange(trait) %>%
  pull(trait) %>%
  unique()
  
ot_data <- readxl::read_excel("data/Karolinska_14trait_judgmentswithlabels.xls") %>%
  mutate(region = "(Oosterhof & Todorov, 2008)") %>%
  rename(stim_id = `Todorov Label`,
         emostable = `emotionally stable`) %>%
  select(region, stim_id, traits)

1.4 Data Processing

1.4.1 Join Data

ratings <- ratings_raw %>%
  rename(qcountry = country) %>%
  separate(lab, c("country", "lab")) %>%
  left_join(regions, by = "country") %>%
  filter(trait != "old")

1.4.2 Graph distributions for trait by region

# plot styles
bgcolor <- "white"
textcolor <- "black"
PSA_theme <- theme(
    plot.background = element_rect(fill = bgcolor, color = NA),
    panel.background = element_rect(fill = NA, color = "grey"),
    legend.background = element_rect(fill = NA),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    text = element_text(color = textcolor, size=15),
    axis.text = element_text(color = textcolor, size=10),
    strip.text.y = element_text(angle = 0, hjust = 0)
  )
ggplot(ratings, aes(rating, fill = trait)) +
  geom_histogram(binwidth = 1, color = "grey", show.legend = F) +
  facet_grid(region~trait, space = "free") +
  scale_x_continuous(breaks = 1:9) +
  PSA_theme

1.5 Data checks

part <- ratings %>%
  group_by(user_id, sex, age, country, language, trait, region, lab) %>%
  summarise(trials = n(),
            stim_n = n_distinct(stim_id)) %>%
  ungroup()

1.5.1 How many participants completed at least one rating for each of 120 stimuli

part %>% 
  mutate(n120 = ifelse(stim_n == 120, "rated all 120", "rated < 120")) %>%
  count(region, n120) %>%
  spread(n120, n) %>%
  knitr::kable("html") %>%
  kable_styling("striped")
region rated < 120 rated all 120
Africa 6 185
Asia 14 273
Australia & New Zealand 3 373
Central America & Mexico 6 165
Eastern Europe 34 271
Middle East 3 147
Scandinavia 16 229
South America 10 447
UK 1 127
USA & Canada 28 1188
Western Europe 10 652

1.5.2 Participants who did not complete exactly 240 trials

part %>% 
  mutate(n240 = case_when(
    trials == 240 ~ "rated 240", 
    trials > 240 ~ "rated > 240",
    trials < 120 ~ "rated < 120",
    trials < 240 ~ "rated 120-239"
  )) %>%
  count(region, n240) %>%
  spread(n240, n, fill = 0) %>%
  knitr::kable("html") %>%
  kable_styling("striped")
region rated < 120 rated > 240 rated 120-239 rated 240
Africa 4 2 32 153
Asia 14 4 73 196
Australia & New Zealand 3 1 72 300
Central America & Mexico 5 0 61 105
Eastern Europe 33 0 64 208
Middle East 3 2 23 122
Scandinavia 15 0 48 182
South America 10 5 75 367
UK 1 0 16 111
USA & Canada 27 0 132 1057
Western Europe 9 1 55 597

1.5.3 Participants with low-variance responses in block 1

identical_rating_threshold <- 0.75 * 120 # use this for registered analyses

inv_participants <- ratings %>%
  filter(block == 1) %>%
  count(user_id, region, trait, rating) %>%
  group_by(user_id, region, trait) %>%
  filter(n == max(n)) %>% # find most common rating for each P
  ungroup() %>%
  filter(n >= identical_rating_threshold) # select Ps who gave the same rating to >= 75% of stimuli

inv <- inv_participants %>%
  count(region, trait) %>%
  spread(region, n, fill = 0) %>%
  mutate(TOTAL = rowSums(select_if(., is.numeric), na.rm = T))

inv_total <-  group_by(inv) %>% 
  summarise_if(is.numeric, sum, na.rm = T) %>%
  mutate(trait = "TOTAL")
 
bind_rows(inv,inv_total) %>%
  knitr::kable("html") %>%
  kable_styling("striped")
trait Africa Asia Australia & New Zealand Central America & Mexico Eastern Europe Middle East Scandinavia South America UK USA & Canada Western Europe TOTAL
aggressive 2 2 6 0 1 1 1 1 0 7 6 27
attractive 0 1 2 2 0 1 2 6 0 6 2 22
caring 0 0 1 0 0 0 0 3 0 6 1 11
confident 2 1 0 2 0 0 0 2 0 0 2 9
dominant 3 0 0 0 1 0 0 1 0 2 3 10
emostable 1 1 1 0 1 0 1 1 2 4 2 14
intelligent 0 1 1 1 1 1 1 0 2 5 2 15
mean 0 2 3 1 2 0 2 3 1 10 5 29
responsible 0 1 0 0 1 0 2 2 1 7 3 17
sociable 0 0 0 1 0 0 0 1 0 6 2 10
trustworthy 0 3 1 2 2 0 1 3 0 1 5 18
unhappy 1 2 4 0 1 0 2 2 0 6 2 20
weird 4 2 3 1 5 0 2 4 1 13 4 39
TOTAL 13 16 22 10 15 3 14 29 7 73 39 241

1.5.4 Participants with no region

part %>% 
  filter(is.na(region)) %>%
  select(user_id, country, lab)
## # A tibble: 0 x 3
## # … with 3 variables: user_id <dbl>, country <chr>, lab <chr>

1.5.5 Remove excluded data and average ratings

data <- ratings %>%
  group_by(user_id, trait) %>%
  filter(
    # did not complete 1+ ratings for each of 120 stimuli
    dplyr::n_distinct(stim_id) == 120,      
    !is.na(region)   # did not specify region (none expected)
  ) %>%
  anti_join(inv_participants, by = "user_id") %>% # exclude Ps with low variance
  ungroup() %>%
  group_by(user_id, age, sex, ethnicity, language, lab, country, region, trait, stim_id) %>%
  summarise(rating = mean(rating)) %>% # average ratings across 2 
  ungroup()

write_csv(data, "data/psa001_ind.csv")

1.6 Participant Demographics

data %>%
  group_by(user_id, language) %>%
  summarise() %>%
  ungroup() %>%
  group_by(language) %>%
  summarise(n = n()) %>%
  knitr::kable("html") %>%
  kable_styling("striped")
language n
EL 31
ENG 1963
ES-PE 40
FAS 15
FR-BE 27
FR-CH 37
FRE 70
GER 148
HU 53
ITA 54
NL 76
NOR 110
POL 11
PT 27
PT-BR 66
RO 9
RU 76
SLO 83
SPA 555
SRP 24
SV 67
THA 29
TUR 96
ZH-CN 36
ZH-S 115
by_region <- data %>%
  group_by(user_id, region) %>%
  summarise() %>%
  ungroup() %>%
  group_by(region) %>%
  summarise(n = n()) %>%
  add_row(region = "TOTAL", n = n_distinct(data$user_id)) %>%
  knitr::kable("html") %>%
  kable_styling("striped")

save_kable(by_region, "figures/n_by_region.html")
           
by_region
region n
Africa 173
Asia 257
Australia & New Zealand 351
Central America & Mexico 155
Eastern Europe 256
Middle East 144
Scandinavia 217
South America 418
UK 120
USA & Canada 1114
Western Europe 613
TOTAL 3818

1.6.1 Age and sex distribution per region

data %>%
  group_by(user_id, sex, age, region) %>%
  summarise() %>%
  ungroup() %>%
  group_by(region) %>%
  mutate(n = n()) %>%
  ungroup() %>%
  ggplot(aes(as.numeric(age), fill = sex)) +
  geom_histogram(binwidth = 5, color = "grey") +
  geom_text(aes(x=0, y=5, label = paste0("n=",n)), color = "black") +
  labs(title="", y="", x="Participant age in 5-year bins") +
  facet_grid(region~., scales="free_y") +
  PSA_theme

1.6.2 Participants per trait per region

data %>%
  group_by(trait, region) %>%
  summarise(n = n_distinct(user_id)) %>%
  ggplot(aes(trait, n)) +
  geom_col(aes(fill = trait), show.legend = F) +
  geom_hline(yintercept = 15) +
  facet_grid(region~., scale = "free") +
  labs(title="", x="", y="Participants per trait per region") +
  theme( axis.text.x = element_text(angle = -45, hjust = 0) ) + 
  PSA_theme

ggsave("figures/participants_per_trait_per_region.png", width = 15, height = 8)

1.6.3 Participants per lab

labs <- data %>%
  unite(lab, country, lab) %>%
  group_by(region, lab, user_id) %>%
  summarise() %>%
  ungroup() %>%
  count(region, lab) %>%
  arrange(region, lab)

write_csv(labs, "data/labs_post_exclusions.csv")

knitr::kable(labs) %>%
  kable_styling("striped")
region lab n
Africa KEN_001 57
Africa KEN_002 59
Africa NGA_001 15
Africa RSA_001 16
Africa RSA_002 26
Asia CHN_001 17
Asia CHN_006 33
Asia CHN_007 20
Asia CHN_014 29
Asia MAS_001 23
Asia MAS_002 18
Asia MAS_003 15
Asia MAS_004 36
Asia TAI_001 12
Asia TAI_002 25
Asia THA_001 29
Australia & New Zealand AUS_004 27
Australia & New Zealand AUS_005 29
Australia & New Zealand AUS_006 63
Australia & New Zealand AUS_007 84
Australia & New Zealand AUS_008 36
Australia & New Zealand AUS_011 28
Australia & New Zealand AUS_014 29
Australia & New Zealand NZL_001 6
Australia & New Zealand NZL_002 49
Central America & Mexico ECU_001 35
Central America & Mexico MEX_002 65
Central America & Mexico MEX_003 25
Central America & Mexico SLV_001 30
Eastern Europe HUN_001 53
Eastern Europe POL_001 11
Eastern Europe ROU_001 9
Eastern Europe RUS_005 76
Eastern Europe SRB_002 24
Eastern Europe SVK_001 25
Eastern Europe SVK_002 58
Middle East IRI_001 15
Middle East TUR_001 26
Middle East TUR_003 35
Middle East TUR_007 4
Middle East TUR_009 35
Middle East UAE_001 29
Scandinavia DNK_001 28
Scandinavia FIN_001 13
Scandinavia NOR_001 44
Scandinavia NOR_002 39
Scandinavia NOR_003 23
Scandinavia NOR_004 16
Scandinavia SWE_004 20
Scandinavia SWE_005 15
Scandinavia SWE_006 19
South America ARG_001 30
South America BRA_001 37
South America BRA_003 29
South America CHI_001 52
South America CHI_003 28
South America CHI_004 26
South America CHI_005 31
South America COL_003 16
South America COL_004 129
South America PER_001 21
South America PER_002 19
UK UK_001 11
UK UK_005 35
UK UK_006 15
UK UK_011 20
UK UK_018 11
UK UK_022 11
UK UK_024 17
USA & Canada CAN_001 13
USA & Canada CAN_008 23
USA & Canada CAN_015 27
USA & Canada CAN_017 40
USA & Canada CAN_018 104
USA & Canada PSA_001 27
USA & Canada PSA_002 118
USA & Canada USA_001 32
USA & Canada USA_003 27
USA & Canada USA_005 13
USA & Canada USA_011 11
USA & Canada USA_014 21
USA & Canada USA_020 65
USA & Canada USA_025 30
USA & Canada USA_026 26
USA & Canada USA_030 34
USA & Canada USA_031 28
USA & Canada USA_033 17
USA & Canada USA_036 14
USA & Canada USA_038 29
USA & Canada USA_039 41
USA & Canada USA_042 12
USA & Canada USA_050 52
USA & Canada USA_051 47
USA & Canada USA_054 32
USA & Canada USA_055 37
USA & Canada USA_065 19
USA & Canada USA_067 7
USA & Canada USA_075 41
USA & Canada USA_083 5
USA & Canada USA_113 41
USA & Canada USA_114 32
USA & Canada USA_115 16
USA & Canada USA_116 17
USA & Canada USA_117 16
Western Europe AUT_001 34
Western Europe AUT_002 27
Western Europe AUT_005 17
Western Europe BEL_001 27
Western Europe ESP_001 35
Western Europe ESP_005 39
Western Europe ESP_006 14
Western Europe FRA_003 25
Western Europe FRA_004 15
Western Europe FRA_005 9
Western Europe FRA_006 21
Western Europe GER_011 16
Western Europe GER_012 18
Western Europe GER_015 31
Western Europe GER_017 21
Western Europe GRE_002 31
Western Europe ITA_001 32
Western Europe ITA_003 22
Western Europe NED_008 39
Western Europe NED_009 76
Western Europe POR_001 27
Western Europe SUI_003 37

2 Analyses

2.1 Main Analysis

First, we will calculate the average rating for each face separately for each of the 13 traits. Like Oosterhof and Todorov (2008), we will then subject these mean ratings to principal component analysis with orthogonal components and no rotation. Using the criteria reported in Oosterhof and Todorov’s (2008) paper, we will retain and interpret the components with an Eigenvalue > 1.

2.1.1 Calculate Alphas

# takes a long time, so saves the results and loads from a file in the next chunk if set to eval = FALSE
data_alpha <- data %>%
  select(user_id, region, stim_id, rating, trait) %>%
  spread(stim_id, rating, sep = "_") %>%
  group_by(trait, region) %>%
  nest() %>%
  mutate(alpha = map(data, function(d) {
    if (dim(d)[1] > 2) {
      # calculate cronbach's alpha
      subdata <- d %>%
        as_tibble() %>%
        select(-user_id) %>%
        t()

      capture.output(suppressWarnings(a <- psych::alpha(subdata)))
      a$total["std.alpha"] %>% pluck(1) %>% round(3)
    } else {
      NA
    }
  })) %>%
  select(-data) %>%
  unnest(alpha) %>%
  ungroup()

saveRDS(data_alpha, file = "data/alphas.RDS")
data_alpha <- readRDS("data/alphas.RDS")

n_alpha <- data %>%
  select(user_id, region, trait) %>%
  distinct() %>%
  count(region, trait) %>%
  left_join(data_alpha, by = c("region", "trait")) %>%
  mutate(
    trait = as.factor(trait),
    region = str_replace(region, " (and|&) ", " &\n"),
    region = as.factor(region),
    region = factor(region, levels = rev(levels(region)))
  )

n_alpha %>%
  mutate(stat = paste("α =", alpha, "<br>n =", n)) %>%
  select(Region = region, stat, trait) %>%
  spread(trait, stat) %>%
  knitr::kable("html", escape = FALSE) %>%
  column_spec(2:14, width = "7%") %>%
  kable_styling("striped", font_size = 9) %>%
  save_kable("figures/alpha.html")
ggplot(n_alpha) +
  geom_tile(aes(trait, region, fill=alpha >=.7), 
           color = "grey20", show.legend = F) +
  geom_text(aes(trait, region, label=sprintf("α = %0.2f\nn = %.0f", alpha, n)), color = "black", size = 5) +
  scale_y_discrete(drop=FALSE) +
  scale_x_discrete(position = "top") +
  labs(x="", y="", title="") +
  scale_fill_manual(values = c("white", "red")) +
  PSA_theme

ggsave("figures/alphas.png", width = 18, height = 10)

2.1.2 Calculate Aggregate Scores

data_agg <- data %>%
  group_by(region, trait, stim_id) %>%
  summarise(rating = mean(rating)) %>%
  ungroup() %>%
  spread(trait, rating)
data_agg %>%
  gather("trait", "rating", aggressive:weird) %>%
  ggplot(aes(rating, fill = trait)) +
  geom_density(show.legend = F) +
  facet_grid(region~trait) +
  PSA_theme

ggsave("figures/agg_scores.png", width = 15, height = 8)

2.1.3 Principal Component Analysis (PCA)

The number of components to extract was determined using eigenvalues > 1 for each world region. PCA was conducted using the psych::principal() function with rotate="none".

# function to calculate PCA

psa_pca <- function(d) {
  traits <- select(d, -stim_id) %>% 
    select_if(colSums(!is.na(.)) > 0) # omits missing traits
  
  # principal components analysis (SPSS-style, following Oosterhof & Todorov)
  ev <- eigen(cor(traits))$values
  nfactors <- sum(ev > 1)
  
  pca <- principal(
    traits, 
    nfactors=nfactors, 
    rotate="none"
  )
  
  stats <- pca$Vaccounted %>% 
    as.data.frame() %>%
    rownames_to_column() %>%
    mutate(type = "stat")
  
  unclass(pca$loadings) %>%
    as.data.frame() %>%
    rownames_to_column() %>%
    mutate(type = "trait") %>%
    bind_rows(stats) %>%
    gather("pc", "loading", 2:(ncol(.)-1))
}
pca_analyses <- data_agg %>%
  bind_rows(ot_data) %>%
  group_by(region) %>%
  nest() %>%
  mutate(pca = map(data, psa_pca)) %>%
  select(-data) %>%
  unnest(pca) %>%
  ungroup() %>%
  mutate(pc = str_replace(pc, "PC", "Component "))

2.1.3.1 Number of Components (and proportion variance) by region

pca_analyses %>%
  filter(rowname == "Proportion Var") %>%
  group_by(region) %>%
  mutate(nPCs = n()) %>%
  ungroup() %>%
  spread(pc, loading) %>%
  select(-rowname, -type) %>%
  mutate_if(is.numeric, round, 3) %>%
  knitr::kable("html") %>%
  kable_styling("striped")
region nPCs Component 1 Component 2 Component 3
(Oosterhof & Todorov, 2008) 2 0.633 0.183
Africa 3 0.426 0.141 0.097
Asia 3 0.596 0.151 0.097
Australia & New Zealand 3 0.586 0.168 0.096
Central America & Mexico 3 0.470 0.153 0.078
Eastern Europe 3 0.579 0.148 0.090
Middle East 3 0.433 0.203 0.110
Scandinavia 3 0.580 0.174 0.088
South America 3 0.544 0.200 0.080
UK 3 0.564 0.146 0.106
USA & Canada 3 0.640 0.166 0.081
Western Europe 3 0.621 0.191 0.083

2.1.3.2 Trait Loadings by Region and Component

# order traits by P1 loading if loads positively on P1, or by -P2 loading otherwise
trait_order <- pca_analyses %>%
  filter(region == "(Oosterhof & Todorov, 2008)", type == "trait") %>%
  spread(pc, loading) %>% 
  arrange(ifelse(`Component 1`>0,`Component 1`,-`Component 2`)) %>% 
  pull(rowname)

pca_prop_var <- pca_analyses %>%
  filter(rowname == "Proportion Var") %>%
  select(-rowname, -type) %>%
  mutate(loading = round(loading, 2))

pca_analyses %>%
  filter(type == "trait") %>%
  select(-type) %>%
  mutate(
    trait = as.factor(rowname),
    trait = factor(trait, levels = c(trait_order, "Prop.Var")),
    loading = round(loading, 2)
  ) %>%
  ggplot() +
  geom_tile(aes(pc, trait, fill=loading), show.legend = F) +
  geom_text(aes(pc, trait, label=sprintf("%0.2f", loading)), color = "black") +
  geom_text(data = pca_prop_var, aes(pc, y = 14, label=sprintf("%0.2f", loading)), color = "black") +
  scale_y_discrete(drop=FALSE) +
  scale_x_discrete(position = "top") + 
  scale_fill_gradient2(low = "dodgerblue", mid = "grey90", high = "#FF3333", limits=c(-1.1, 1.1)) +
  facet_wrap(~region, scales = "fixed", ncol = 4) +
  labs(x = "", y = "", title="") +
  PSA_theme

ggsave("figures/PCA_loadings.png", width = 15, height = 10)

2.1.3.3 Replication Criteria (PCA)

Oosterhof and Todorov’s valence-dominance model will be judged to have been replicated in a given world region if the first two components both have Eigenvalues > 1, the first component (i.e., the one explaining more of the variance in ratings) is correlated strongly (loading > .7) with trustworthiness and weakly (loading < .5) with dominance, and the second component (i.e., the one explaining less of the variance in ratings) is correlated strongly (loading > .7) with dominance and weakly (loading < .5) with trustworthiness. All three criteria need to be met to conclude that the model was replicated in a given world region.

pca_rep <- pca_analyses %>%
  filter(
    type == "trait", 
    rowname %in% c("trustworthy", "dominant"),
    pc %in% c("Component 1", "Component 2")
  ) %>%
  select(-type) %>%
  mutate(rowname = paste(pc, rowname)) %>%
  select(-pc) %>%
  spread(rowname, loading) %>%
  rename(Region = region) %>%
  mutate(Replicated = ifelse(
    `Component 1 dominant` < .5 & `Component 1 trustworthy` > .7 & 
    `Component 2 dominant` > .7 & `Component 2 trustworthy` < .5,
    "Yes", "No"
  )) %>%
  mutate_if(is.numeric, round, 3) %>%
  knitr::kable("html", col.names = c("Region", "Dominant", "Trustworthy", "Dominant", "Trustworthy", "Replicated")) %>%
  add_header_above(c(" " = 1, "Component 1" = 2, "Component 2" = 2, "  " = 1)) %>%
  kable_styling("striped")

save_kable(pca_rep, "figures/PCA_rep_criteria.html")

pca_rep
Component 1
Component 2
Region Dominant Trustworthy Dominant Trustworthy Replicated
(Oosterhof & Todorov, 2008) -0.244 0.941 0.929 -0.060 Yes
Africa 0.241 0.896 0.586 -0.041 No
Asia 0.520 0.811 0.721 0.111 No
Australia & New Zealand 0.368 0.916 0.848 -0.083 Yes
Central America & Mexico -0.038 0.728 0.868 -0.008 Yes
Eastern Europe 0.530 0.885 0.757 -0.063 No
Middle East 0.530 0.744 0.737 -0.331 No
Scandinavia 0.350 0.894 0.880 -0.154 Yes
South America 0.260 0.874 0.883 0.011 Yes
UK 0.258 0.895 0.894 -0.059 Yes
USA & Canada 0.369 0.948 0.830 -0.111 Yes
Western Europe 0.134 0.947 0.909 -0.163 Yes

2.1.4 Factor Congruence (PCA)

This analysis determines the congruence between the components from Oosterhof & Todorov (2008) and the components in each world region, using the psych::factor.congruence function. Congruence is labeled “not similar” for values < 0.85, “fairly similar”, for values < 0.09, and “equal” for values >= 0.95.

# get loadings for original O&T2008
ot2008_pca_loadings <- pca_analyses %>%
  filter(region == "(Oosterhof & Todorov, 2008)", type == "trait") %>%
  select(-region, -type) %>%
  spread(pc, loading) %>%
  column_to_rownames()

# run factor congruence for each region  
fc_pca <- pca_analyses %>%
  filter(type == "trait", region != "(Oosterhof & Todorov, 2008)") %>%
  select(-type) %>%
  spread(pc, loading) %>%
  group_by(region) %>%
  nest() %>%
  mutate(fc = map(data, function(d) {
    loadings <- d %>%
      as.data.frame() %>%
      select(rowname, `Component 1`, `Component 2`) %>%
      arrange(rowname) %>%
      column_to_rownames()
    
    psych::factor.congruence(loadings, 
                             ot2008_pca_loadings, 
                             digits = 4) %>%
      as.data.frame() %>%
      rownames_to_column(var = "regionPC")
  })) %>%
  select(-data) %>%
  unnest(fc) %>%
  ungroup()

pc_fc_table <- fc_pca %>%
  gather(origPC, congruence, `Component 1`:`Component 2`) %>%
  mutate(sig = case_when(
           congruence < .85 ~ "not similar",
           congruence < .95 ~ "fairly similar",
           congruence >= .95 ~ "equal"
         ),
         congruence = sprintf("%0.3f", congruence)) %>%
  filter(regionPC == origPC) %>%
  select(region, PC = regionPC, congruence, sig) %>%
  gather(k, v, congruence, sig) %>%
  unite(PC, PC, k, remove = T) %>%
  spread(PC, v) %>%
  knitr::kable("html", digits = 3, align = 'lrlrl', escape = F,
               col.names = c("Region", "Loading", "Congruence", "Loading", "Congruence")) %>%
  add_header_above(c(" " = 1, "Component 1" = 2, "Component 2" = 2)) %>%
  kable_styling("striped")

save_kable(pc_fc_table, "figures/PCA_factor_congruence.html")

pc_fc_table
Component 1
Component 2
Region Loading Congruence Loading Congruence
Africa 0.963 equal 0.892 fairly similar
Asia 0.960 equal 0.694 not similar
Australia & New Zealand 0.972 equal 0.954 equal
Central America & Mexico 0.990 equal 0.961 equal
Eastern Europe 0.956 equal 0.947 fairly similar
Middle East 0.928 fairly similar 0.812 not similar
Scandinavia 0.975 equal 0.969 equal
South America 0.977 equal 0.924 fairly similar
UK 0.980 equal 0.979 equal
USA & Canada 0.975 equal 0.953 equal
Western Europe 0.988 equal 0.944 fairly similar

2.2 Robustness Checks

2.2.1 Exploratory Factor Analysis (EFA)

The number of factors to extract was determined using parallel analysis (paran::paran()) for each world region. EFA was conducted using the psych::fa() function with all default options.

# function to calculate EFA

psa_efa <- function(d) {
  traits <- select(d, -stim_id) %>% 
    select_if(colSums(!is.na(.)) > 0) # omits missing traits
  
  # Parallel Analysis with Dino's 'paran' package. 
  nfactors <- paran(traits, iterations = 5000, 
          centile = 0, quietly = TRUE, 
          status = FALSE, all = TRUE, 
          cfa = TRUE, graph = FALSE)
  
  efa <- psych::fa(traits, nfactors$Retained) 
  
  stats <- efa$Vaccounted %>% 
    as.data.frame() %>%
    rownames_to_column() %>%
    mutate(type = "stat")
  
  unclass(efa$loadings) %>%
    as.data.frame() %>%
    rownames_to_column() %>%
    mutate(type = "trait") %>%
    bind_rows(stats) %>%
    gather("mr", "loading", 2:(ncol(.)-1))
}

Calculate for each region

efa_analyses <- data_agg %>%
  bind_rows(ot_data) %>%
  group_by(region) %>%
  nest() %>%
  mutate(efa = map(data, psa_efa)) %>%
  select(-data) %>%
  unnest(efa) %>%
  ungroup() %>%
  mutate(mr = str_replace(mr, "MR", "Factor "))

2.2.1.1 Number of Factors (and proportion variance) by region

Note: many of the analyses will produce warnings in the subset version.

efa_analyses %>%
  filter(rowname == "Proportion Var") %>%
  group_by(region) %>%
  mutate(nMRs = n()) %>%
  ungroup() %>%
  spread(mr, loading) %>%
  select(-rowname, -type) %>%
  mutate_if(is.numeric, round, 3) %>%
  knitr::kable("html") %>%
  kable_styling("striped")
region nMRs Factor 1 Factor 2 Factor 3 Factor 4 Factor 5
(Oosterhof & Todorov, 2008) 2 0.564 0.227
Africa 3 0.211 0.220 0.141
Asia 3 0.347 0.338 0.112
Australia & New Zealand 3 0.345 0.178 0.285
Central America & Mexico 3 0.239 0.152 0.222
Eastern Europe 3 0.428 0.186 0.158
Middle East 3 0.296 0.191 0.182
Scandinavia 3 0.375 0.176 0.246
South America 4 0.229 0.228 0.192 0.167
UK 3 0.353 0.131 0.279
USA & Canada 5 0.326 0.131 0.166 0.150 0.159
Western Europe 3 0.348 0.247 0.266

2.2.1.2 Trait Loadings by Region and Factor

efa_prop_var <- efa_analyses %>%
  filter(rowname == "Proportion Var") %>%
  select(-rowname, -type) %>%
  mutate(loading = round(loading, 2))

efa_analyses %>%
  filter(type == "trait") %>%
  select(-type) %>%
  mutate(
    trait = as.factor(rowname),
    trait = factor(trait, levels = c(trait_order, "Prop.Var")),
    loading = round(loading, 2)
  ) %>%
  ggplot() +
  geom_tile(aes(mr, trait, fill=loading), show.legend = F) +
  geom_text(aes(mr, trait, label=sprintf("%0.2f", loading)), color = "black") +
  geom_text(data = efa_prop_var, aes(mr, y = 14, label=sprintf("%0.2f", loading)), color = "black") +
  scale_y_discrete(drop=FALSE) +
  scale_x_discrete(position = "top") + 
  scale_fill_gradient2(low = "dodgerblue", mid = "grey90", high = "#FF3333", limits=c(-1.1, 1.1)) +
  facet_wrap(~region, scales = "fixed", ncol = 4) +
  labs(x = "", y = "", title="") +
  PSA_theme

ggsave("figures/EFA_loadings.png", width = 15, height = 10)

2.2.1.3 Replication Criteria (EFA)

Oosterhof and Todorov’s valence-dominance model will be judged to have been replicated in a given world region if the the first factor is correlated strongly (loading > .7) with trustworthiness and weakly (loading < .5) with dominance, and the second factor is correlated strongly (loading > .7) with dominance and weakly (loading < .5) with trustworthiness. All these criteria need to be met to conclude that the model was replicated in a given world region.

efa_rep <- efa_analyses %>%
  filter(
    type == "trait", 
    rowname %in% c("trustworthy", "dominant"),
    mr %in% c("Factor 1", "Factor 2")
  ) %>%
  select(-type) %>%
  mutate(rowname = paste(mr, rowname)) %>%
  select(-mr) %>%
  spread(rowname, loading) %>%
  rename(Region = region) %>%
  mutate(Replicated = ifelse(
    `Factor 1 dominant` < .5 & `Factor 1 trustworthy` > .7 & 
    `Factor 2 dominant` > .7 & `Factor 2 trustworthy` < .5,
    "Yes", "No"
  )) %>%
  mutate_if(is.numeric, round, 3) %>%
  knitr::kable("html", col.names = c("Region", "Dominant", "Trustworthy", "Dominant", "Trustworthy", "Replicated")) %>%
  add_header_above(c(" " = 1, "Factor 1" = 2, "Factor 2" = 2, "  " = 1)) %>%
  kable_styling("striped")

save_kable(efa_rep, "figures/EFA_rep_criteria.html")

efa_rep
Factor 1
Factor 2
Region Dominant Trustworthy Dominant Trustworthy Replicated
(Oosterhof & Todorov, 2008) 0.228 0.826 0.970 -0.288 Yes
Africa 0.308 0.541 0.275 -0.432 No
Asia -0.016 0.134 0.793 0.684 No
Australia & New Zealand 0.462 0.626 0.718 -0.269 No
Central America & Mexico 0.165 0.564 0.739 -0.170 No
Eastern Europe 0.768 0.755 0.549 -0.320 No
Middle East 0.189 0.371 0.827 -0.216 No
Scandinavia 0.312 0.677 0.807 -0.279 No
South America 0.233 0.432 0.744 -0.350 No
UK 0.173 0.730 0.844 -0.129 Yes
USA & Canada -0.059 0.784 0.978 0.193 Yes
Western Europe 0.233 0.657 0.806 -0.452 No

2.2.2 Factor Congruence (EFA)

This analysis determines the congruence between the factors from Oosterhof & Todorov (2008) and the factors in each world region, using the psych::factor.congruence function. Congruence is labeled “not similar” for values < 0.85, “fairly similar”, for values < 0.09, and “equal” for values >= 0.95.

# get loadings for original O&T2008
ot2008_efa_loadings <- efa_analyses %>%
  filter(region == "(Oosterhof & Todorov, 2008)", type == "trait") %>%
  select(-region, -type) %>%
  spread(mr, loading) %>%
  column_to_rownames()

# run factor congruence for each region 
fc_efa <- efa_analyses %>%
  filter(type == "trait", region != "(Oosterhof & Todorov, 2008)") %>%
  select(-type) %>%
  spread(mr, loading) %>%
  group_by(region) %>%
  nest() %>%
  mutate(fc = map(data, function(d) {
    loadings <- d %>%
      as.data.frame() %>%
      select(rowname, `Factor 1`, `Factor 2`) %>%
      arrange(rowname) %>%
      column_to_rownames()
    
    psych::factor.congruence(loadings, 
                             ot2008_efa_loadings, 
                             digits = 4) %>%
  as.data.frame() %>%
  rownames_to_column(var = "regionMR")
  })) %>%
  select(-data) %>%
  unnest(fc) %>%
  ungroup()

mr_fc_table <- fc_efa %>%
  gather(origMR, congruence, `Factor 1`:`Factor 2`) %>%
  mutate(sig = case_when(
           congruence < .85 ~ "not similar",
           congruence < .95 ~ "fairly similar",
           congruence >= .95 ~ "equal"
         ),
         congruence = sprintf("%0.3f", congruence)) %>%
  filter(regionMR == origMR) %>%
  select(region, MR = regionMR, congruence, sig) %>%
  gather(k, v, congruence, sig) %>%
  unite(MR, MR, k, remove = T) %>%
  spread(MR, v) %>%
  knitr::kable("html", digits = 3, align = 'lrlrl', 
               col.names = c("Region", "Loading", "Congruence", "Loading", "Congruence")) %>%
  add_header_above(c(" " = 1, "Factor 1" = 2, "Factor 2" = 2)) %>%
  kable_styling("striped")


save_kable(mr_fc_table, "figures/EFA_factor_congruence.html")

mr_fc_table
Factor 1
Factor 2
Region Loading Congruence Loading Congruence
Africa 0.759 not similar 0.810 not similar
Asia 0.763 not similar 0.087 not similar
Australia & New Zealand 0.850 not similar 0.969 equal
Central America & Mexico 0.816 not similar 0.978 equal
Eastern Europe 0.890 fairly similar 0.928 fairly similar
Middle East 0.803 not similar 0.785 not similar
Scandinavia 0.875 fairly similar 0.984 equal
South America 0.778 not similar 0.948 fairly similar
UK 0.816 not similar 0.970 equal
USA & Canada 0.664 not similar 0.693 not similar
Western Europe 0.837 not similar 0.942 fairly similar